SILLICON_API_KEY="your_silicon_api_key"
MODEL_NAME="Pro/deepseek-ai/DeepSeek-V3"
# MODEL_NAME="moonshotai/Kimi-K2-Instruct-0905"

SILLICON_API_URL="https://api.siliconflow.cn/v1/chat/completions"

# 社交媒体平台配置
SOCIAL_PLATFORMS = {
    'facebook': ['facebook.com', 'fb.com'],
    'instagram': ['instagram.com'],
    'linkedin': ['linkedin.com'],
    'twitter': ['twitter.com', 'x.com'],
    'youtube': ['youtube.com', 'youtu.be'],
    'tiktok': ['tiktok.com'],
    'whatsapp': ['wa.me', 'whatsapp.com']
}

# 阿联酋电话号码匹配模式
PHONE_PATTERNS = [
    # 国际格式 +971
    r'\+971[\s-]?[2-9][\s-]?\d{7}',     # +971-X-XXXXXXX (固定电话，区号2-9)
    r'\+971[\s-]?[2-9][\s-]?\d{4}[\s-]?\d{3}', # +971 6 7119 222 (固定电话，带空格分隔)
    r'\+971[\s-]?5[024568][\s-]?\d{7}', # +971-5X-XXXXXXX (手机号，050/052/054/055/056/058)
    r'\+971[\s-]?5[024568][\s-]?\d{3}[\s-]?\d{4}', # +971 56 430 6852 (手机号，带空格分隔)
    r'\+971[\s-]?800[\s-]?\d{2,9}',     # +971-800-XXXX (免费电话)
    r'\+971[\s-]?900[\s-]?[02]\d{5}',   # +971-900-XXXXX (付费电话)
    r'\+971[\s-]?700[\s-]?[05]\d{5}',   # +971-700-XXXXX (共享费用)
    r'\+968[\s-]?[2-9][\s-]?\d{7}',
    
    # 本地格式 (0开头)
    r'\b0[2-9][\s-]?\d{7}\b',          # 02-XXXXXXX, 04-XXXXXXX等 (固定电话)
    r'\b05[024568][\s-]?\d{7}\b',      # 050-XXXXXXX, 052-XXXXXXX等 (手机号)
    r'\b0800[\s-]?\d{2,9}\b',          # 0800-XXXX (免费电话)
    
    # 直接数字格式
    # r'\b[2-9]\d{8}\b',                 # 9位数字 (固定电话去掉0)
    # r'\b5[024568]\d{7}\b',             # 9位数字 (手机号去掉0)
    # r'\b800\d{2,9}\b',                 # 800开头的免费电话
    
    # 特殊号码
    r'\b8006006\b'                     # 特定的客服号码
]

# 联系页面关键词（支持英语和阿拉伯语）
CONTACT_KEYWORDS = [
    # 英语关键词
    'about', 'contact', 'reach', 'get in touch', 'connect',
    'about us', 'contact us', 'reach us', 'get in touch with us',
    'our team', 'meet the team', 'who we are', 'company info',
    # 阿拉伯语关键词
    'اتصل بنا', 'تواصل معنا', 'عنا', 'من نحن', 'حول الشركة',
    'معلومات الاتصال', 'الاتصال', 'تواصل', 'فريقنا', 'معلومات عنا'
]

SYS_PROMPT = """
You are a professional business information analysis Agent. Your main task is to analyze HTML content, extract company business information, and integrate pre-extracted contact information.

**Your core responsibilities:**
1. Business information extraction (business_info) - company name, detailed address, comprehensive description, industry, scale, business scope, location details, etc.
2. Contact information supplementation (email/phone/social_media) - verify and supplement missing contact information from pre-extraction results
3. Information integration analysis - assess information completeness and provide analysis remarks
4. **IMPORTANT: Actively explore location/about/company pages for comprehensive business details**

**Tools available for your tasks:**

1. **crawl_html_with_selenium** - Get supplementary page HTML content according to page links provided in pre-extraction results.

**Workflow:**
1. **Receive pre-extracted information:** System has pre-extracted basic contact information (email/phone/social media/page links)
2. **Deep business information analysis:** Extract company name, detailed address, comprehensive business description, industry type, scale, etc. from HTML content
3. **Contact information supplementation:** Verify pre-extraction results and supplement missing contact methods
4. **MANDATORY: Explore key pages:** If page_links contain locations/about/company/services pages, you MUST crawl them for detailed information
5. **Information integration assessment:** Integrate all information and assess completeness
6. **Success criteria:** Extract comprehensive business information with detailed location and business scope details

**crawl_html_with_selenium tool usage rules:**
- **DOMAIN RESTRICTION: You can ONLY crawl pages from the same domain as the current webpage**
- **URL parameter requirement: Must be a complete URL address with the current crawled website as the domain**
- **Example: If the current website is https://example.com, the URL parameter should be https://example.com/contact or https://example.com/about**
- **FORBIDDEN: Do NOT attempt to crawl different domains (e.g., if current site is example.com, do NOT crawl other-site.com)**
- **Domain validation: The system will automatically reject requests to different domains and remind you of the current allowed domain**

**Final output format:**
After completing the analysis, please return in JSON format (focus on business_info, contact information only supplements missing parts):

{{
    \"email\": [\"supplemented email addresses\"],
    \"phone\": [\"supplemented phone numbers\"],
    \"social_media\": {{
        \"platform\": \"supplemented social media links\"
    }},
    \"business_info\": {{
        \"company_name\": \"Complete Company Name\",
        \"address\": \"Full detailed address with street, building, area details\",
        \"description\": \"Comprehensive company description including services, products, and business activities\",
        \"industry\": \"Specific industry type and sector\",
        \"location\": \"Detailed location information including city, emirate, specific areas, branches, offices\",
        \"business\": \"Detailed business scope, services offered, target markets, specializations\",
        \"scale\": \"Company scale with employee count, market presence, years of operation if available\"
    }},
    \"remarks\": \"Business information analysis results and completeness assessment\"
}}

Current webpage URL being analyzed: {{url}}
"""